{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### DataFrame的apply操作：指定DataFrame可以通过apply方法让df中每一列或者每一行都执行自定义方法"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "dataframe.apply(function,axis)对一行或一列做出一些操作（axis=1则为对某一列进行操作，此时，apply函数每次将dataframe的一行传给function，然后获取返回值，将返回值放入一个series）\n",
    "python去空格:字符串.strip()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>PassengerId</th>\n",
       "      <th>Survived</th>\n",
       "      <th>Pclass</th>\n",
       "      <th>Name</th>\n",
       "      <th>Sex</th>\n",
       "      <th>Age</th>\n",
       "      <th>SibSp</th>\n",
       "      <th>Parch</th>\n",
       "      <th>Ticket</th>\n",
       "      <th>Fare</th>\n",
       "      <th>Cabin</th>\n",
       "      <th>Embarked</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Braund, Mr. Owen Harris</td>\n",
       "      <td>male</td>\n",
       "      <td>22.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>A/5 21171</td>\n",
       "      <td>7.2500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
       "      <td>female</td>\n",
       "      <td>38.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>PC 17599</td>\n",
       "      <td>71.2833</td>\n",
       "      <td>C85</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>Heikkinen, Miss. Laina</td>\n",
       "      <td>female</td>\n",
       "      <td>26.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>STON/O2. 3101282</td>\n",
       "      <td>7.9250</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
       "      <td>female</td>\n",
       "      <td>35.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>113803</td>\n",
       "      <td>53.1000</td>\n",
       "      <td>C123</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Allen, Mr. William Henry</td>\n",
       "      <td>male</td>\n",
       "      <td>35.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>373450</td>\n",
       "      <td>8.0500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   PassengerId  Survived  Pclass  \\\n",
       "0            1         0       3   \n",
       "1            2         1       1   \n",
       "2            3         1       3   \n",
       "3            4         1       1   \n",
       "4            5         0       3   \n",
       "\n",
       "                                                Name     Sex   Age  SibSp  \\\n",
       "0                            Braund, Mr. Owen Harris    male  22.0      1   \n",
       "1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   \n",
       "2                             Heikkinen, Miss. Laina  female  26.0      0   \n",
       "3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   \n",
       "4                           Allen, Mr. William Henry    male  35.0      0   \n",
       "\n",
       "   Parch            Ticket     Fare Cabin Embarked  \n",
       "0      0         A/5 21171   7.2500   NaN        S  \n",
       "1      0          PC 17599  71.2833   C85        C  \n",
       "2      0  STON/O2. 3101282   7.9250   NaN        S  \n",
       "3      0            113803  53.1000  C123        S  \n",
       "4      0            373450   8.0500   NaN        S  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv('./data/titanic_train.csv')\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 891 entries, 0 to 890\n",
      "Data columns (total 12 columns):\n",
      "PassengerId    891 non-null int64\n",
      "Survived       891 non-null int64\n",
      "Pclass         891 non-null int64\n",
      "Name           891 non-null object\n",
      "Sex            891 non-null object\n",
      "Age            714 non-null float64\n",
      "SibSp          891 non-null int64\n",
      "Parch          891 non-null int64\n",
      "Ticket         891 non-null object\n",
      "Fare           891 non-null float64\n",
      "Cabin          204 non-null object\n",
      "Embarked       889 non-null object\n",
      "dtypes: float64(2), int64(5), object(5)\n",
      "memory usage: 83.6+ KB\n"
     ]
    }
   ],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 891 entries, 0 to 890\n",
      "Data columns (total 12 columns):\n",
      "PassengerId    891 non-null int64\n",
      "Survived       891 non-null int64\n",
      "Pclass         891 non-null int64\n",
      "Name           891 non-null object\n",
      "Sex            891 non-null object\n",
      "Age            714 non-null float64\n",
      "SibSp          891 non-null int64\n",
      "Parch          891 non-null int64\n",
      "Ticket         891 non-null object\n",
      "Fare           891 non-null float64\n",
      "Cabin          204 non-null object\n",
      "Embarked       889 non-null object\n",
      "dtypes: float64(2), int64(5), object(5)\n",
      "memory usage: 318.5 KB\n"
     ]
    }
   ],
   "source": [
    "df.info(memory_usage='deep')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# apply 自定义函数执行运用的时候，它会将按照每一行去执行"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "PassengerId                  100\n",
       "Survived                       0\n",
       "Pclass                         2\n",
       "Name           Kantor, Mr. Sinai\n",
       "Sex                         male\n",
       "Age                           34\n",
       "SibSp                          1\n",
       "Parch                          0\n",
       "Ticket                    244367\n",
       "Fare                          26\n",
       "Cabin                        NaN\n",
       "Embarked                       S\n",
       "dtype: object"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def hundredth_row(columns):\n",
    "    item=columns.iloc[99]\n",
    "    return item\n",
    "hundredth_row=df.apply(hundredth_row)\n",
    "hundredth_row"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "100"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "series=df['PassengerId']\n",
    "item=series.iloc[99]\n",
    "item"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 注意事项len(a) 如果a=NAN 统计结果为1，相当于count作用"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "PassengerId    False\n",
       "Survived       False\n",
       "Pclass         False\n",
       "Name           False\n",
       "Sex            False\n",
       "Age            False\n",
       "SibSp          False\n",
       "Parch          False\n",
       "Ticket         False\n",
       "Fare           False\n",
       "Cabin           True\n",
       "Embarked       False\n",
       "Name: 0, dtype: bool"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    " columns_null= pd.isnull(df.iloc[0])\n",
    " columns_null"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Cabin    NaN\n",
       "Name: 0, dtype: object"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.iloc[0][columns_null]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df.iloc[0][columns_null])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#统计df中每一列不为空的数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "PassengerId      0\n",
       "Survived         0\n",
       "Pclass           0\n",
       "Name             0\n",
       "Sex              0\n",
       "Age            177\n",
       "SibSp            0\n",
       "Parch            0\n",
       "Ticket           0\n",
       "Fare             0\n",
       "Cabin          687\n",
       "Embarked         2\n",
       "dtype: int64"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def not_null_count(columns):\n",
    "    columns_null = pd.isnull(columns)\n",
    "    null = columns[columns_null]\n",
    "    return len(null)\n",
    "columns_null_count = df.apply(not_null_count)\n",
    "columns_null_count"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "  series=df['PassengerId']\n",
    "    columns_null = pd.isnull(series)\n",
    "    len(series[columns_null]) \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "687"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "    series=df['Cabin']\n",
    "    columns_null = pd.isnull(series)\n",
    "    len(series[columns_null]) \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 横着统计"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0       Third class\n",
       "1       First class\n",
       "2       Third class\n",
       "3       First class\n",
       "4       Third class\n",
       "5       Third class\n",
       "6       First class\n",
       "7       Third class\n",
       "8       Third class\n",
       "9      Second class\n",
       "10      Third class\n",
       "11      First class\n",
       "12      Third class\n",
       "13      Third class\n",
       "14      Third class\n",
       "15     Second class\n",
       "16      Third class\n",
       "17     Second class\n",
       "18      Third class\n",
       "19      Third class\n",
       "20     Second class\n",
       "21     Second class\n",
       "22      Third class\n",
       "23      First class\n",
       "24      Third class\n",
       "25      Third class\n",
       "26      Third class\n",
       "27      First class\n",
       "28      Third class\n",
       "29      Third class\n",
       "           ...     \n",
       "861    Second class\n",
       "862     First class\n",
       "863     Third class\n",
       "864    Second class\n",
       "865    Second class\n",
       "866    Second class\n",
       "867     First class\n",
       "868     Third class\n",
       "869     Third class\n",
       "870     Third class\n",
       "871     First class\n",
       "872     First class\n",
       "873     Third class\n",
       "874    Second class\n",
       "875     Third class\n",
       "876     Third class\n",
       "877     Third class\n",
       "878     Third class\n",
       "879     First class\n",
       "880    Second class\n",
       "881     Third class\n",
       "882     Third class\n",
       "883    Second class\n",
       "884     Third class\n",
       "885     Third class\n",
       "886    Second class\n",
       "887     First class\n",
       "888     Third class\n",
       "889     First class\n",
       "890     Third class\n",
       "Length: 891, dtype: object"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def which_class(row):\n",
    "    pclass = row['Pclass']\n",
    "    if pd.isnull(pclass):\n",
    "        return 'Unknow'\n",
    "    elif pclass == 1:\n",
    "        return 'First class'\n",
    "    elif pclass == 2:\n",
    "        return 'Second class'\n",
    "    elif pclass == 3:\n",
    "        return 'Third class'\n",
    "classes = df.apply(which_class,axis=1)\n",
    "classes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "    row=df.iloc[0]\n",
    "    row['Pclass']\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "7       2.00\n",
       "9      14.00\n",
       "10      4.00\n",
       "14     14.00\n",
       "16      2.00\n",
       "22     15.00\n",
       "24      8.00\n",
       "39     14.00\n",
       "43      3.00\n",
       "50      7.00\n",
       "58      5.00\n",
       "59     11.00\n",
       "63      4.00\n",
       "68     17.00\n",
       "71     16.00\n",
       "78      0.83\n",
       "84     17.00\n",
       "86     16.00\n",
       "111    14.50\n",
       "114    17.00\n",
       "119     2.00\n",
       "125    12.00\n",
       "138    16.00\n",
       "147     9.00\n",
       "156    16.00\n",
       "163    17.00\n",
       "164     1.00\n",
       "165     9.00\n",
       "171     4.00\n",
       "172     1.00\n",
       "       ...  \n",
       "691     4.00\n",
       "720     6.00\n",
       "721    17.00\n",
       "731    11.00\n",
       "746    16.00\n",
       "750     4.00\n",
       "751     6.00\n",
       "755     0.67\n",
       "764    16.00\n",
       "777     5.00\n",
       "780    13.00\n",
       "781    17.00\n",
       "787     8.00\n",
       "788     1.00\n",
       "791    16.00\n",
       "802    11.00\n",
       "803     0.42\n",
       "813     6.00\n",
       "819    10.00\n",
       "824     2.00\n",
       "827     1.00\n",
       "830    15.00\n",
       "831     0.83\n",
       "841    16.00\n",
       "844    17.00\n",
       "850     4.00\n",
       "852     9.00\n",
       "853    16.00\n",
       "869     4.00\n",
       "875    15.00\n",
       "Name: Age, Length: 113, dtype: float64"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def is_minor(row):\n",
    "    if row['Age'] < 18:\n",
    "        return True\n",
    "    else:\n",
    "        return False\n",
    "minors = df.apply(is_minor,axis = 1)\n",
    "df['Age'][minors]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
